#!/bin/bash
proj=psoria71

mkdir -p ${proj}

## process frag -------
python /home/gjsx/micromamba/envs/maestro/lib/python3.8/site-packages/MAESTRO/scATAC_FragmentReshape.py \
-F fragments.71.rev.tsv \
-O ${proj}/fragments_corrected_dedup_count.tsv

bgzip -@16 -c ${proj}/fragments_corrected_dedup_count.tsv > ${proj}/fragments_corrected_dedup_count.tsv.gz
tabix -p bed ${proj}/fragments_corrected_dedup_count.tsv.gz

## qcstat promoter --------
bedtools intersect -wa -a ${proj}/fragments_corrected_dedup_count.tsv \
-b /home/gjsx/micromamba/envs/maestro/lib/python3.8/site-packages/MAESTRO/annotations/GRCh38_promoter.bed \
-u > ${proj}/fragments_promoter.tsv

sort -k4,4 -V ${proj}/fragments_promoter.tsv > ${proj}/fragments_promoter_sortbybarcode.tsv

bedtools groupby -i ${proj}/fragments_promoter_sortbybarcode.tsv -g 4 -c 5 -o sum > ${proj}/singlecell_promoter.txt

## all peak call ------
macs2 callpeak -f BEDPE -g hs --outdir ${proj} -n macs2 -B \
-q 0.05 --nomodel --extsize=50 --SPMR --keep-dup all \
-t ${proj}/fragments_corrected_dedup_count.tsv

## qcstat mapped ------
sort -k4,4 -V ${proj}/fragments_corrected_dedup_count.tsv > ${proj}/fragments_corrected_count_sortedbybarcode.tsv

bedtools groupby -i ${proj}/fragments_corrected_count_sortedbybarcode.tsv -g 4 -c 5 -o sum > ${proj}/singlecell_mapped.txt

## qcstat_singlecell ----
sort -k1,1 ${proj}/singlecell_mapped.txt > ${proj}/singlecell_mapped_sortbybarcode.txt

sort -k1,1 ${proj}/singlecell_promoter.txt > ${proj}/singlecell_promoter_sortbybarcode.txt

join --nocheck-order -t $'\t' -a1 -e'0' -o'1.1 1.2 2.2' -1 1 -2 1 \
${proj}/singlecell_mapped_sortbybarcode.txt ${proj}/singlecell_promoter_sortbybarcode.txt \
> ${proj}/singlecell.txt

## merge peak -----
cat ${proj}/macs2_peaks.narrowPeak | sort -k1,1 -k2,2n | cut -f 1-4 \
> ${proj}/macs2_cat_peaks.bed

mergeBed -i ${proj}/macs2_cat_peaks.bed | grep -v '_' | grep -v 'chrEBV' > ${proj}/macs2_final_peaks.bed

rm ${proj}/macs2_cat_peaks.bed

## bdg2bw (optional) ------
bedtools slop -i ${proj}/macs2_treat_pileup.bdg \
-g /home/gjsx/micromamba/envs/maestro/lib/python3.8/site-packages/MAESTRO/annotations/GRCh38_chrom_len.txt -b 0 |\
/home/gjsx/micromamba/envs/maestro/lib/python3.8/site-packages/MAESTRO/utils/bedClip stdin \
/home/gjsx/micromamba/envs/maestro/lib/python3.8/site-packages/MAESTRO/annotations/GRCh38_chrom_len.txt \
${proj}/macs2_sort.clip

sort -k1,1 -k2,2n ${proj}/macs2_sort.clip > ${proj}/macs2_sort.bdg

/home/gjsx/micromamba/envs/maestro/lib/python3.8/site-packages/MAESTRO/utils/bedGraphToBigWig \
${proj}/macs2_sort.bdg \
/home/gjsx/micromamba/envs/maestro/lib/python3.8/site-packages/MAESTRO/annotations/GRCh38_chrom_len.txt \
${proj}/macs2.bw

rm ${proj}/macs2_sort.bdg ${proj}/macs2_sort.clip

## qcstat-bulk
awk '{print $3-$2}' ${proj}/fragments_corrected_dedup_count.tsv > ${proj}/psoria_frag.bed
grep 'chrM' ${proj}/fragments_corrected_dedup_count.tsv -c >> ${proj}/flagstat.txt || true
grep -v 'chrM' ${proj}/fragments_corrected_dedup_count.tsv | bedtools intersect -wa -a - -b /home/gjsx/micromamba/envs/maestro/lib/python3.8/site-packages/MAESTRO/annotations/GRCh38_promoter.bed -u | wc -l >> ${proj}/flagstat.txt || true
grep -v 'chrM' ${proj}/fragments_corrected_dedup_count.tsv | bedtools intersect -wa -a - -b ${proj}/macs2_peaks.narrowPeak -u | wc -l >> ${proj}/flagstat.txt || true

## qc plot
## NOTE: need to execute out of Rstudio
Rscript /home/gjsx/micromamba/envs/maestro/lib/python3.8/site-packages/MAESTRO/R/scATACseq_qc.R \
--fragment psoria_frag.bed --singlestat singlecell.txt --countcutoff 1000 --fripcutoff 0.2 --prefix qc --outdir ${proj}/qc

## count peak
## default 8 cores, cost 9~30 min
MAESTRO scatac-peakcount --binary --peak ${proj}/macs2_final_peaks.bed \
--fragment ${proj}/fragments_corrected_dedup_count.tsv --barcode ${proj}/qc_scATAC_validcells.txt \
--species GRCh38 --cores 32 --directory ${proj} --outprefix maestro

## filter mtx
MAESTRO scatac-qc --format h5 --peakcount ${proj}/maestro_peak_count.h5 \
--peak-cutoff 100 --cell-cutoff 50 --directory ${proj} --outprefix maestro

## gene scoring
MAESTRO scatac-genescore --format h5 --peakcount ${proj}/maestro_filtered_peak_count.h5 \
--species GRCh38 --directory ${proj} --outprefix maestro --model Enhanced

## R analysis
Rscript /home/gjsx/micromamba/envs/maestro/lib/python3.8/site-packages/MAESTRO/R/scATACseq_pipe.R \
--peakcount maestro_filtered_peak_count.h5 --rpmatrix maestro_gene_score.h5 \
--species GRCh38 --annotation True --method peak-based \
--signature human.immune.CIBERSORT --gigglelib /home/gjsx/append-ssd/maestro/giggle.all \
--fragment fragments_corrected_dedup_count.tsv.gz \
--outdir ${proj} --thread 1

## report
## Need QC images in their default path
python /home/gjsx/micromamba/envs/maestro/lib/python3.8/site-packages/MAESTRO/scATAC_HTMLReport_multi.py \
--directory ${proj} --outprefix report \
--input-path /home/supervisor/mist/gj_seekgene_crc/ctrl72 --species GRCh38 \
--platform 10x-genomics --input-format fastq --mapping chromap
